package com.derbysoft.drc.billcheck

import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by xinliwang on 2017/6/4.
  */
object HotelResCodeRepeatDetail {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local").setAppName("HotelReservationCodeDetail")
    val sc = new SparkContext(conf)
    val rdd = sc.textFile("/Users/xinliwang/Downloads/HILTON_201705_booking_res_detail.csv")
    val filter = rdd.map(line => {
      val split = line.split(",")
      (split(3), split(2))
    }).reduceByKey(_ + "," + _).filter(e => e._2.contains(",") && e._2.split(",").distinct.length != 1)
    filter.foreach(println)
  }
}
